In [4]:
import pandas as pd
import seaborn as sns
In [9]:
import pandas as pd
# Specify the path to the uploaded CSV file
file_path = 'FY_2024_Hospital_Readmissions_Reduction_Program_Hospital (1).csv'
# Load the CSV file into a pandas DataFrame
data = pd.read_csv(file_path)
# Display the first few rows of the DataFrame to ensure it loaded correctly
df.head()
Out[9]:
| Facility Name | Facility ID | State | Measure Name | Number of Discharges | Footnote | Excess Readmission Ratio | Predicted Readmission Rate | Expected Readmission Rate | Number of Readmissions | Start Date | End Date | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | SOUTHEAST HEALTH MEDICAL CENTER | 10001 | AL | READM-30-HIP-KNEE-HRRP | NaN | NaN | 0.8916 | 3.5325 | 3.9618 | Too Few to Report | 7/1/2019 | 6/30/2022 |
| 1 | SOUTHEAST HEALTH MEDICAL CENTER | 10001 | AL | READM-30-HF-HRRP | 616.0 | NaN | 1.1003 | 23.1263 | 21.0184 | 149 | 7/1/2019 | 6/30/2022 |
| 2 | SOUTHEAST HEALTH MEDICAL CENTER | 10001 | AL | READM-30-AMI-HRRP | 274.0 | NaN | 0.9332 | 12.9044 | 13.8283 | 32 | 7/1/2019 | 6/30/2022 |
| 3 | SOUTHEAST HEALTH MEDICAL CENTER | 10001 | AL | READM-30-PN-HRRP | 404.0 | NaN | 0.9871 | 17.0529 | 17.2762 | 68 | 7/1/2019 | 6/30/2022 |
| 4 | SOUTHEAST HEALTH MEDICAL CENTER | 10001 | AL | READM-30-CABG-HRRP | 126.0 | NaN | 0.9517 | 9.8131 | 10.3112 | 11 | 7/1/2019 | 6/30/2022 |
In [10]:
data['Number of Discharges'].fillna(data['Number of Discharges'].median(), inplace=True)
In [11]:
data['Number of Readmissions'] = pd.to_numeric(data['Number of Readmissions'], errors='coerce')
readmissions_by_diagnosis = data.groupby('Measure Name')['Number of Readmissions'].sum()
readmissions_by_diagnosis_sorted = readmissions_by_diagnosis.sort_values(ascending=False)
top_5_diagnoses = readmissions_by_diagnosis_sorted.head(5)
for diagnosis, readmissions in top_5_diagnoses.items():
print(f"{diagnosis}: {int(readmissions):,} readmissions.")
READM-30-HF-HRRP: 148,213 readmissions. READM-30-PN-HRRP: 101,169 readmissions. READM-30-COPD-HRRP: 39,275 readmissions. READM-30-AMI-HRRP: 33,269 readmissions. READM-30-HIP-KNEE-HRRP: 6,721 readmissions.
reads Heart Failure, Pneuomnia, Chronic Obstructive Pulmonary Disease Care, Heart Attack Care, and Total Hip/Knee Arthroplasty
research questions: How do the excess readmission ratio and the predicted readmission rate influence the actual number of readmissions across healthcare facilities in various states?
How can the excess readmission ratio and predicted readmission rate be used as indicators of the actual number of readmissions across healthcare facilities in different states?
In [12]:
data.describe()
data[['Excess Readmission Ratio', 'Predicted Readmission Rate', 'Number of Readmissions']].info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 18774 entries, 0 to 18773 Data columns (total 3 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Excess Readmission Ratio 12077 non-null float64 1 Predicted Readmission Rate 12077 non-null float64 2 Number of Readmissions 7890 non-null float64 dtypes: float64(3) memory usage: 440.1 KB
In [ ]:
correlation_matrix = data[['Excess Readmission Ratio', 'Predicted Readmission Rate', 'Number of Readmissions']].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
Out[Â ]:
<Axes: >
In [13]:
import plotly.express as px
columns_needed = ['State', 'Measure Name', 'Number of Readmissions']
data_cleaned = data[columns_needed]
data_cleaned['Number of Readmissions'] = pd.to_numeric(data_cleaned['Number of Readmissions'], errors='coerce')
state_agg = data_cleaned.groupby(['State', 'Measure Name'], as_index=False).sum()
state_agg['Rank'] = state_agg.groupby('State')['Number of Readmissions'].rank(method='max', ascending=False)
prevalent_cause = state_agg[state_agg['Rank'] == 1]
map_data = prevalent_cause[['State', 'Measure Name', 'Number of Readmissions']]
map_data.rename(columns={
'Measure Name': 'Most Prevalent Cause',
'Number of Readmissions': 'Total Readmissions'
}, inplace=True)
state_codes = {
'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi',
'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York',
'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia',
'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia'
}
map_data['State Name'] = map_data['State'].map(state_codes)
regions = {
'Midwest': [
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Michigan', 'Minnesota',
'Missouri', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'
],
'West': [
'Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana',
'Nevada', 'Oregon', 'Utah', 'Washington', 'Wyoming'
],
'Southeast': [
'Alabama', 'Arkansas', 'Florida', 'Georgia', 'Kentucky', 'Louisiana',
'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee', 'Virginia',
'West Virginia'
],
'Southwest': [
'Arizona', 'New Mexico', 'Oklahoma', 'Texas'
],
'Northeast': [
'Connecticut', 'Delaware', 'Maine', 'Maryland', 'Massachusetts',
'New Hampshire', 'New Jersey', 'New York', 'Pennsylvania',
'Rhode Island', 'Vermont', 'District of Columbia'
]
}
def assign_region(state_name):
for region, states in regions.items():
if state_name in states:
return region
return 'Unknown'
map_data['Region'] = map_data['State Name'].apply(assign_region)
fig = px.choropleth(
map_data,
locations='State',
locationmode="USA-states",
color='Most Prevalent Cause',
hover_name='State Name',
hover_data={
'Total Readmissions': True,
'Most Prevalent Cause': True,
'Region': True,
'State': False,
},
scope="usa",
title="Most Prevalent Causes of Readmission by State with Regions",
labels={'Most Prevalent Cause': 'Prevalent Cause'}
)
fig.update_layout(geo=dict(bgcolor='rgba(0,0,0,0)'))
fig.show()
C:\Users\joeth\AppData\Local\Temp\ipykernel_23592\2662286663.py:6: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
data_cleaned['Number of Readmissions'] = pd.to_numeric(data_cleaned['Number of Readmissions'], errors='coerce')
C:\Users\joeth\AppData\Local\Temp\ipykernel_23592\2662286663.py:14: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
map_data.rename(columns={
In [14]:
columns_needed = ['Facility Name', 'State', 'Measure Name', 'Predicted Readmission Rate', 'Expected Readmission Rate']
scatter_data = data[columns_needed].dropna()
scatter_data['Predicted Readmission Rate'] = pd.to_numeric(scatter_data['Predicted Readmission Rate'], errors='coerce')
scatter_data['Expected Readmission Rate'] = pd.to_numeric(scatter_data['Expected Readmission Rate'], errors='coerce')
state_codes = {
'AL': 'Alabama', 'AK': 'Alaska', 'AZ': 'Arizona', 'AR': 'Arkansas',
'CA': 'California', 'CO': 'Colorado', 'CT': 'Connecticut', 'DE': 'Delaware',
'FL': 'Florida', 'GA': 'Georgia', 'HI': 'Hawaii', 'ID': 'Idaho',
'IL': 'Illinois', 'IN': 'Indiana', 'IA': 'Iowa', 'KS': 'Kansas',
'KY': 'Kentucky', 'LA': 'Louisiana', 'ME': 'Maine', 'MD': 'Maryland',
'MA': 'Massachusetts', 'MI': 'Michigan', 'MN': 'Minnesota', 'MS': 'Mississippi',
'MO': 'Missouri', 'MT': 'Montana', 'NE': 'Nebraska', 'NV': 'Nevada',
'NH': 'New Hampshire', 'NJ': 'New Jersey', 'NM': 'New Mexico', 'NY': 'New York',
'NC': 'North Carolina', 'ND': 'North Dakota', 'OH': 'Ohio', 'OK': 'Oklahoma',
'OR': 'Oregon', 'PA': 'Pennsylvania', 'RI': 'Rhode Island', 'SC': 'South Carolina',
'SD': 'South Dakota', 'TN': 'Tennessee', 'TX': 'Texas', 'UT': 'Utah',
'VT': 'Vermont', 'VA': 'Virginia', 'WA': 'Washington', 'WV': 'West Virginia',
'WI': 'Wisconsin', 'WY': 'Wyoming', 'DC': 'District of Columbia'
}
scatter_data['State Name'] = scatter_data['State'].map(state_codes)
regions = {
'Midwest': [
'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Michigan', 'Minnesota',
'Missouri', 'Nebraska', 'North Dakota', 'Ohio', 'South Dakota', 'Wisconsin'
],
'West': [
'Alaska', 'California', 'Colorado', 'Hawaii', 'Idaho', 'Montana',
'Nevada', 'Oregon', 'Utah', 'Washington', 'Wyoming'
],
'Southeast': [
'Alabama', 'Arkansas', 'Florida', 'Georgia', 'Kentucky', 'Louisiana',
'Mississippi', 'North Carolina', 'South Carolina', 'Tennessee', 'Virginia',
'West Virginia'
],
'Southwest': [
'Arizona', 'New Mexico', 'Oklahoma', 'Texas'
],
'Northeast': [
'Connecticut', 'Delaware', 'Maine', 'Maryland', 'Massachusetts',
'New Hampshire', 'New Jersey', 'New York', 'Pennsylvania',
'Rhode Island', 'Vermont', 'District of Columbia'
]
}
def assign_region(state_name):
for region, states in regions.items():
if state_name in states:
return region
return 'Unknown'
scatter_data['Region'] = scatter_data['State Name'].apply(assign_region)
measure_name_mapping = {
'READM-30-HF-HRRP': 'Heart Failure',
'READM-30-PN-HRRP': 'Pneumonia',
'READM-30-COPD-HRRP': 'Chronic Obstructive Pulmonary Disease',
'READM-30-AMI-HRRP': 'Acute Myocardial Infarction',
'READM-30-HIP-KNEE-HRRP': 'Hip and Knee Replacements',
'READM-30-CABG-HRRP': 'Coronary Artery Bypass Graft'
}
scatter_data['Measure Name'] = scatter_data['Measure Name'].replace(measure_name_mapping)
def create_region_scatter(region_name):
region_data = scatter_data[scatter_data['Region'] == region_name]
if region_data.empty:
print(f"No data available for region: {region_name}")
return
fig = px.scatter(
region_data,
x='Predicted Readmission Rate',
y='Expected Readmission Rate',
color='Measure Name',
hover_name='Facility Name',
hover_data={
'State Name': True,
'Measure Name': True,
'Predicted Readmission Rate': True,
'Expected Readmission Rate': True,
},
title=f"{region_name} - Predicted vs. Expected Readmission Rates",
labels={
'Predicted Readmission Rate': 'Predicted Rate (%)',
'Expected Readmission Rate': 'Expected Rate (%)',
'Measure Name': 'Condition'
}
)
fig.update_traces(marker=dict(size=10, opacity=0.7))
fig.update_layout(
xaxis_title='Predicted Readmission Rate (%)',
yaxis_title='Expected Readmission Rate (%)',
legend_title='Condition',
plot_bgcolor='rgba(0,0,0,0)',
paper_bgcolor='rgba(255,255,255,1)'
)
fig.show()
for region in regions.keys():
create_region_scatter(region)